setwd('/Users/bencehamrak/Library/CloudStorage/OneDrive-CentralEuropeanUniversity(CEUGmbHHungarianBranchOffice)/Backup_data_2024/Research Projects/D2D')

library(dplyr)
library(haven)

# Replication files: Door-to-door campaigns in an electoral autocracy: Evidence from Hungary

## DATA FILE MERGING

# Raw data file 1. 2019 EP and 2018 general election results, matched precinct-level
results <- read.csv("precinct_results_2018_19.csv") 
# Filter precincts that did not change between 2018 and 2019 (10183 out of 10277)
d2d_precinct <- results %>% filter(safe_18 == TRUE)

# Raw data file 2. Voter-per-address data, precinct-level
d2d_density <- read_dta("density.dta")

# Raw data file 3. 2019 EP Canvassing data by Momentum, precinct-level
d2d_canv <- read.csv("canvdat.csv")

## Create merging variable across data files: precinct_id
d2d_precinct <- d2d_precinct %>% mutate(precinct_id = paste(city, szk19, sep="_"))
d2d_canv <- d2d_canv %>% mutate(precinct_id = paste(city, szk19, sep="_"))
d2d_density <- d2d_density %>% mutate(precinct_id = paste(city, szk18, sep="_"))

## Merging by precinct_id

# 1. Merge observables data
d2d_obs <- inner_join(d2d_precinct, d2d_density, by="precinct_id")
## keep only observations that have data from all variables (results, address per voter, 10087 out of 10183)

# 2. Merge final dataset: Precints with or without campaign activity (keep all for comparison on the outcomes)
d2d <- full_join(d2d_obs, d2d_canv, by="precinct_id") %>%
  dplyr::select(-matches("\\.y$"), -city) %>% ## remove duplicate columns
  rename_with(~ gsub("\\.x$", "", .), ends_with(".x")) %>% ## rename unique columns
  filter(!is.na(city)) ## 9 precincts from canvassing data had no safe match (removed earlier from results data, therefore found no match when merged)

#write.csv(d2d, "d2d.csv")

## END OF SCRIPT 